/*************************************************************************/
/*								 	 */
/*    Central tree-forming algorithm incorporating all criteria  	 */
/*    ---------------------------------------------------------	 	 */
/*								 	 */
/*************************************************************************/


#include "defns.i"
#include "types.i"
#include "extern.i"


ItemCount
	*Weight,	/* Weight[i]  = current fraction of item i */
	**Freq,		/* Freq[x][c] = no. items of class c with outcome x */
	*ValFreq,	/* ValFreq[x]   = no. items with outcome x */
	*ClassFreq;	/* ClassFreq[c] = no. items of class c */

float
	
	*Bar,		/* Bar[a]  = best threshold for contin att a */
	*UnknownRate;	/* UnknownRate[a] = current unknown rate for att a */

Boolean
	*Tested;	/* Tested[a] set if att a has already been tested */
	



/*************************************************************************/
/*								 	 */
/*		Allocate space for tree tables			 	 */
/*								 	 */
/*************************************************************************/


    InitialiseTreeData()
/*  ------------------  */
{ 
    DiscrValue v;
    Attribute a;
    short k;
    int j;

    /********************************************************/

    Tested	= (char *) calloc(MaxAtt+1, sizeof(char));



    Bar		= (float *) calloc(MaxAtt+1, sizeof(float));



    Weight = (ItemCount *) calloc(MaxItem+1, sizeof(ItemCount));

    Freq  = (ItemCount **) calloc(MaxDiscrVal+1, sizeof(ItemCount *));
    ForEach(v, 0, MaxDiscrVal)
    {
	Freq[v]  = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    }

    ValFreq = (ItemCount *) calloc(MaxDiscrVal+1, sizeof(ItemCount));
    ClassFreq = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));


    UnknownRate = (float *) calloc(MaxAtt+1, sizeof(float));


}



/*************************************************************************/
/*								 	 */
/*		Initialise the weight of each item		 	 */
/*								 	 */
/*************************************************************************/


    InitialiseWeights()
/*  -----------------  */
{
    ItemNo i;

    ForEach(i, 0, MaxItem)
    {
        Weight[i] = 1.0;
    }
}

/*****************************************************************/
/* A function used to randomly select an attribute at a node. */
/* specificly, select the index of an attribute and the selected*/
/* attribute will be tested at the node*/
/***************************************************************/


int att_index()
  
{
   int index;

   index = rand() % (1+MaxAtt);

   return index;
}

/******************************************/
/*determine the index of continuous variable*/
/* added by kun zhang*/

ItemNo cont_att_index(low,high)
   ItemNo low;
   ItemNo high;
{
  
   int index;
   index=(rand()%(high-low+1))+low;
   return index;
}

/*************************************************************************/
/*                                                                	 */
/*  Change a leaf into a test on a continuous attribute           	 */
/*                                                                	 */
/*************************************************************************/


    ContinTest(Node, Att)
/*  ----------  */
    Tree Node;
    Attribute Att;
{
    float Thresh;
    ItemCount CountItems();

    Sprout(Node, 2);

    Thresh = Bar[Att];

    Node->NodeType	= ThreshContin;
    Node->Tested	= Att;
    Node->Cut		=
    Node->Lower		=
    Node->Upper		= Thresh;
    Node->Errors        = 0;
}



/*************************************************************************/
/*								  	 */
/*  Continuous attributes are treated as if they have possible values	 */
/*	0 (unknown), 1 (less than cut), 2(greater than cut)	  	 */
/*  This routine finds the best cut for items Fp through Lp and sets	 */
/*  Info[], Gain[] and Bar[]						 */
/*								  	 */
/*************************************************************************/


Boolean EvalContinuousAtt(Att, Fp, Lp)
/*  -----------------  */ 
    Attribute Att;
    ItemNo Fp, Lp; 
//ItemNo *UsedIndex;
    
{ 
    ItemNo i,j, BestI1,BestI2, Xp, Tries=0;
    ItemCount Items, KnownItems, LowItems, MinSplit, CountItems();
    ClassNo c;
    float AvGain=0, Val, BestVal, BaseInfo, ThreshCost;
    void Swap();
    
    
    /* in contin.c function ContinTest(), the real thresh is actually*
    /* the greatest value below the value stored in Bar[Att]*/
    
    float RealThresh=0.0;
   
    short TryTimes; /* used to force exit the while loop which */
                    /* is used to search for the thresh value of this contin variable*/

    
    Verbosity(2) printf("\tAtt %s", AttName[Att]);
    Verbosity(3) printf("\n");

    ResetFreq(2);

    /*  Omit and count unknown values */
   
    Items = CountItems(Fp, Lp);
    Xp = Fp;
    ForEach(i, Fp, Lp)
    {
	if ( CVal(Item[i],Att) == Unknown )
	{
	    Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
	    Swap(Xp, i);
	    Xp++;
	}
    }

    ValFreq[0] = 0;
    ForEach(c, 0, MaxClass)
    {
	ValFreq[0] += Freq[0][c];
    }

    KnownItems = Items - ValFreq[0];
    UnknownRate[Att] = 1.0 - KnownItems / Items;

    /*  Special case when very few known values  */

    if ( KnownItems < 2 * MINOBJS )
    {
	Verbosity(2) printf("\tinsufficient cases with known values\n");
	
	return false;
    }


    Quicksort(Xp,Lp,Att,Swap);
    BestI1 = cont_att_index(Xp, Lp);
    BestI2 = cont_att_index(Xp, Lp);
    
    TryTimes=0;
    while(CVal(Item[BestI1],Att)==CVal(Item[BestI2],Att))
      {
	
	BestI2 = cont_att_index(Xp, Lp);
	TryTimes ++;
	if(TryTimes > 20)
	  return false;
      }
    Bar[Att] = (CVal(Item[BestI1],Att)+CVal(Item[BestI2],Att))/2.0;
    /*  
    else{
	 BestI = cont_att_index(Xp, Lp);
	
	 TryTimes=0;
	 while(TestedConIndex[Att][BestI]!=0)
	     {
	       BestI = cont_att_index(Xp, Lp);
	       
		 TryTimes ++;
		
		 if(TryTimes > 20)
		   return false;
	     }
	 *UsedIndex=BestI;
	 Bar[Att] = CVal(Item[BestI],Att);
	 
	 	 TestedConIndex[Att][BestI]++;
    }

*/
    Verbosity(2) printf("\t cut=%f\n",Bar[Att]);/*the cut value stored in Bar*/
    return true;
}



/*************************************************************************/
/*									 */
/*  Construct and return a node for a test on a discrete attribute	 */
/*									 */
/*************************************************************************/


    DiscreteTest(Node, Att)
/*  ----------  */
    Tree Node;
    Attribute Att;
{
    ItemCount CountItems();

    Sprout(Node, MaxAttVal[Att]);

    Node->NodeType	= BrDiscr;
    Node->Tested	= Att;
    Node->Errors	= 0;
}



/*************************************************************************/
/*								 	 */
/*  Build a decision tree for the cases Fp through Lp:		 	 */
/*								 	 */
/*  - if all cases are of the same class, the tree is a leaf and so	 */
/*      the leaf is returned labelled with this class		 	 */
/*								 	 */
/*  - for each attribute, calculate the potential information provided 	 */
/*	by a test on the attribute (based on the probabilities of each	 */
/*	case having a particular value for the attribute), and the gain	 */
/*	in information that would result from a test on the attribute	 */
/*	(based on the probabilities of each case with a particular	 */
/*	value for the attribute being of a particular class)		 */
/*								 	 */
/*  - on the basis of these figures, and depending on the current	 */
/*	selection criterion, find the best attribute to branch on. 	 */
/*	Note:  this version will not allow a split on an attribute	 */
/*	unless two or more subsets have at least MINOBJS items. 	 */
/*								 	 */
/*  - try branching and test whether better than forming a leaf	 	 */
/*								 	 */
/*************************************************************************/


Tree FormTree(Fp, Lp,DepthCounter)
/*   ---------  */
    ItemNo Fp, Lp; 
    short DepthCounter;
{ 
    ItemNo i, Kp, Ep, Group();
    ItemCount Cases, NoBestClass, KnownCases, CountItems();
    float Factor, BestVal, Val, AvGain=0, Worth();
    Attribute Att, BestAtt, Possible=0;
    ClassNo c, BestClass;
    Tree Node, Leaf();
    DiscrValue v;
    Boolean PrevAllKnown;
    int a, j,try;   

    Cases = CountItems(Fp, Lp);

    /*  Generate the class frequency distribution  */

    ForEach(c, 0, MaxClass)
    {
	ClassFreq[c] = 0;
    }
    ForEach(i, Fp, Lp)
    { 
	ClassFreq[ Class(Item[i]) ] += Weight[i];
    } 

    /*  Find the most frequent class  */

    BestClass = 0;
    ForEach(c, 0, MaxClass)
    {
	if ( ClassFreq[c] > ClassFreq[BestClass] )
	{
	    BestClass = c;
	}
    }
    NoBestClass = ClassFreq[BestClass];

    Node = Leaf(ClassFreq, BestClass, Cases, Cases - NoBestClass);

    if(!FullTree){
     if ( DepthCounter >=(RDT_depth))
    {
        return Node;
    }
    }
    
    /*  If all cases are of the same class or there are not enough
	cases to divide, the tree is a leaf  */

    if ( NoBestClass == Cases  || Cases < 2 * MINOBJS )
    { 
	return Node;
    } 

    Verbosity(1)
    	printf("\n%d items, total weight %.1f\n", Lp - Fp + 1, Cases);

    BestAtt=(short)att_index();
    try=0;
    while ( SpecialStatus[BestAtt] == IGNORE || (MaxAttVal[BestAtt] && Tested[BestAtt]) ||(!MaxAttVal[BestAtt] && !EvalContinuousAtt(BestAtt,Fp,Lp))) 
	{   
	    BestAtt=(short)att_index();
	    /* printf("BestAtt : %d\n",BestAtt);*/
	    try++;
	    if(try>20)
	      return Node;

	}
   

    
    /*  Decide whether to branch or not  */ 

    if ( BestAtt != None )
    { 
	Verbosity(1)
	{
	    printf("\tbest attribute %s", AttName[BestAtt]);
	    if ( ! MaxAttVal[BestAtt] )
	    {
		printf(" cut %.3f", Bar[BestAtt]);
	    }
	    
	}	

	/*  Build a node of the selected test  */

        if ( MaxAttVal[BestAtt] )
	    /*  Discrete valued attribute  */
	    DiscreteTest(Node, BestAtt);
    
	else
	    /*  Continuous attribute  */
	    ContinTest(Node, BestAtt);
	

	/*  Remove unknown attribute values  */

	PrevAllKnown = AllKnown;

	Kp = Group(0, Fp, Lp, Node) + 1;
	if ( Kp != Fp ) AllKnown = false;
	KnownCases = Cases - CountItems(Fp, Kp-1);
	UnknownRate[BestAtt] = (Cases - KnownCases) / (Cases + 0.001);

	Verbosity(1)
	{
	    if ( UnknownRate[BestAtt] > 0 )
	    {
		printf("\tunknown rate for %s = %.3f\n",
		       AttName[BestAtt], UnknownRate[BestAtt]);
	    }
	}

	/*  Recursive divide and conquer  */

	++Tested[BestAtt];

	Ep = Kp - 1;
	Node->Errors = 0;

	ForEach(v, 1, Node->Forks)
	{
	    Ep = Group(v, Kp, Lp, Node);

	    if ( Kp <= Ep )
	    {
		Factor = CountItems(Kp, Ep) / KnownCases;

		ForEach(i, Fp, Kp-1)
		{
		    Weight[i] *= Factor;
		}

		Node->Branch[v] = FormTree(Fp, Ep,DepthCounter+1);
		Node->Errors += Node->Branch[v]->Errors;

		Group(0, Fp, Ep, Node);
		ForEach(i, Fp, Kp-1)
		{
		    Weight[i] /= Factor;
		}
	    }
	    else
	    {
		Node->Branch[v] = Leaf(Node->ClassDist, BestClass, 0.0, 0.0);
	    }
	}

	--Tested[BestAtt];

	AllKnown = PrevAllKnown;

	/*  See whether we would have been no worse off with a leaf  */
	if(FullTree)
	  {
	    if ( Node->Errors >= Cases - NoBestClass - Epsilon )
	      { 
		Verbosity(1)
		  printf("Collapse tree for %d items to leaf %s\n",
			 Lp - Fp + 1, ClassName[BestClass]);

		Node->NodeType = 0;
	      }
	  }
       
    }
    else
    { 
	Verbosity(1)
	    printf("\tno sensible splits  %.1f/%.1f\n",
		   Cases, Cases - NoBestClass);
    } 

    return Node; 
} 



/*************************************************************************/
/*								 	 */
/*  Group together the items corresponding to branch V of a test 	 */
/*  and return the index of the last such			 	 */
/*								 	 */
/*  Note: if V equals zero, group the unknown values		 	 */
/*								 	 */
/*************************************************************************/


ItemNo Group(V, Fp, Lp, TestNode)
/*     -----  */
    DiscrValue V;
    ItemNo Fp, Lp;
    Tree TestNode;
{
    ItemNo i;
    Attribute Att;
    float Thresh;
    Set SS;
    void Swap();

    Att = TestNode->Tested;

    if ( V )
    {
	/*  Group items on the value of attribute Att, and depending
	    on the type of branch  */

	switch ( TestNode->NodeType )
	{
	    case BrDiscr:

		ForEach(i, Fp, Lp)
		{
		    if ( DVal(Item[i], Att) == V ) Swap(Fp++, i);
		}
		break;

	    case ThreshContin:

		Thresh = TestNode->Cut;
		ForEach(i, Fp, Lp)
		{
		    if ( (CVal(Item[i], Att) <= Thresh) == (V == 1) ) Swap(Fp++, i);
		}
		break;

	    case BrSubset:

		SS = TestNode->Subset[V];
		ForEach(i, Fp, Lp)
		{
		    if ( In(DVal(Item[i], Att), SS) ) Swap(Fp++, i);
		}
		break;
	}
    }
    else
    {
	/*  Group together unknown values  */

	switch ( TestNode->NodeType )
	{
	    case BrDiscr:
	    case BrSubset:

		ForEach(i, Fp, Lp)
		{
		    if ( ! DVal(Item[i], Att) ) Swap(Fp++, i);
		}
		break;

	    case ThreshContin:

		ForEach(i, Fp, Lp)
		{
		    if ( CVal(Item[i], Att) == Unknown ) Swap(Fp++, i);
		}
		break;
	}
    }

    return Fp - 1;
}



/*************************************************************************/
/*								 	 */
/*	Return the total weight of items from Fp to Lp		 	 */
/*								 	 */
/*************************************************************************/


ItemCount CountItems(Fp, Lp)
/*        ----------  */
    ItemNo Fp, Lp;
{
    register ItemCount Sum=0.0, *Wt, *LWt;

    if ( AllKnown ) return Lp - Fp + 1;

    for ( Wt = Weight + Fp, LWt = Weight + Lp ; Wt <= LWt ; )
    {
	Sum += *Wt++;
    }

    return Sum;
}



/*************************************************************************/
/*                                                               	 */
/*		Exchange items at a and b			 	 */
/*									 */
/*************************************************************************/


void Swap(a,b)
/*   ----  */
    ItemNo a, b;
{
    register Description Hold;
    register ItemCount HoldW;

    Hold = Item[a];
    Item[a] = Item[b];
    Item[b] = Hold;

    HoldW = Weight[a];
    Weight[a] = Weight[b];
    Weight[b] = HoldW;
}
